In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
years=range(1880, 2017)
pieces = []
columns = [ 'name', 'sex', 'births' ]
for year in years:
path = 'C:/Users/User/Desktop/python/pandas/Pandas_Python3/names/yob%d.txt' %year
frame = pd.read_csv(path, names=columns)
frame['year'] = year
pieces.append(frame)
names = pd.concat(pieces, ignore_index = True)
In [2]:
male_names = names[names.sex == 'M']
male_names.head()
Out[2]:
In [3]:
female_names = names[names.sex == 'F']
female_names.head()
Out[3]:
In [4]:
names.shape
Out[4]:
In [5]:
len(pd.DataFrame(male_names['name']).merge(pd.DataFrame(female_names['name'])).name)
Out[5]:
In [6]:
pd.DataFrame(male_names['name']).merge(pd.DataFrame(female_names['name'])).name
Out[6]:
In [7]:
pd.unique(pd.DataFrame(male_names['name']).merge(pd.DataFrame(female_names['name'])).name)
Out[7]:
In [8]:
common_names = pd.unique(pd.DataFrame(male_names['name']).merge(pd.DataFrame(female_names['name'])).name)
common_names
Out[8]:
In [17]:
len(common_names) #колво общих уникальных имен
Out[17]:
In [10]:
# кол-во за каждый год отдельно
#names_counts = names[names.isin(common_names)].groupby(['name', '']).size().reset_index(name='counts')
names_counts = names[names['name'].isin(common_names)].drop('year',axis=1).groupby(['name']).sum().sort_values('births',ascending=False)
In [16]:
james = names[(names.name == 'James') ]
years = []
births = []
for year, subset in james.groupby('year'):
years.append(year)
births.append(subset.births.sum())
plt.plot(years, births)
james = names[ (names.name == 'John') ]
years = []
births = []
for year, subset in james.groupby('year'):
years.append(year)
births.append(subset.births.sum())
plt.plot(years, births)
james = names[ (names.name == 'Robert')]
years = []
births = []
for year, subset in james.groupby('year'):
years.append(year)
births.append(subset.births.sum())
plt.plot(years, births)
plt.xlabel('Года')
plt.ylabel('Кол-во рождений')
plt.legend(['James', 'John', 'Robert'])
plt.show()
In [ ]: